import numpy as np
import math
import CA_UCB
import initialization
import exploration
import preliminary
import exploitation
import communication
import matplotlib.pyplot as plt
import Phased_ETC
import pickle
def ML_ETC(K,M,T,preference,value):
    t_total = np.zeros((M, K), int)
    t_total_collision = np.zeros((M, K), int)
    information = np.zeros((M, K, M), int) - 1
    agent = list(np.arange(M))
    arm = np.arange(K)
    arm = list(arm)
    match = preliminary.stable_matching(agent, value, preference, M, K)
    u = np.zeros((M, K))
    time = np.zeros((M, K),int)
    pull = np.zeros(M, int)
    dot_number=int(T/100000)+1
    regret=np.zeros(dot_number)
    reward=np.zeros(dot_number)
    t_0=0
    information, t_total, t_total_collision = initialization.index_assignment(preference, M, K, information, t_total,
                                                                              t_total_collision)
    information, t_total, t_total_collision = initialization.information_access(preference, M, K, information, t_total,
                                                                                t_total_collision)
    leader = preliminary.whether_leader(information, agent[0], arm, agent, M)
    follower = set()
    for i in agent:
        if i not in leader:
            follower.add(i)
    follower = list(follower)
    for m in agent:
        if leader != preliminary.whether_leader(information, m, arm, agent, M):
            print(0)
    success = np.zeros(M, int)
    success, t_total, t_total_collision ,reward= communication.success_information(leader, follower, arm, success,
                                                                            preference, information, M,
                                                                            t_total, t_total_collision, agent,reward,K,value)
    while (success == 0).any():
                u, time, t_total, t_total_collision, reward = exploration.exploration(arm, agent, preference, u, T, time, value,
                                                                              M, t_total,
                                                                              t_total_collision, reward)
                success = np.zeros(M, int)
                for m in range(M):
                       success[m] = exploration.whether_success(m, u, time, arm, T)
                success, t_total, t_total_collision,reward = communication.success_information(leader, follower, arm, success,
                                                                                     preference, information, M,
                                                                                     t_total, t_total_collision,agent,reward,K,value)
    pull, arm, t_total, t_total_collision,reward = communication.GS_arm(information, u, leader, preference, arm, follower, M,
                                                                 agent, K, t_total, t_total_collision,reward,value)
    for i in leader:
        agent.remove(i)
        if pull[i] in arm:
            arm.remove(pull[i])
    while len(agent) > 0:
        leader = preliminary.whether_leader(information, agent[0], arm, agent, M)
        for m in agent:
            if leader != preliminary.whether_leader(information, m, arm, agent, M):
                print(0)
        for i in leader:
            follower.remove(i)
        success = np.ones(M, int)
        for m in agent:
            success[m] = exploration.whether_success(m, u, time, arm, T)
        success, t_total, t_total_collision, reward = communication.success_information(leader, follower, arm, success,
                                                                                        preference, information, M,
                                                                                        t_total, t_total_collision,
                                                                                        agent, reward, K, value)
        while (success == 0).any():
            u, time, t_total, t_total_collision, reward = exploration.exploration(arm, agent, preference, u, T, time,
                                                                                  value,
                                                                                  M, t_total,
                                                                                  t_total_collision, reward)
            success = np.ones(M, int)
            for m in agent:
                    success[m] = exploration.whether_success(m, u, time, arm, T)
            success, t_total, t_total_collision,reward = communication.success_information(leader, follower, arm, success,
                                                                                    preference, information, M,
                                                                                    t_total, t_total_collision, agent,reward,K,value)
        pull, arm, t_total, t_total_collision ,reward= communication.GS_arm(information, u, leader, preference, arm,
                                                                     follower, M, agent, K, t_total,
                                                                     t_total_collision,reward,value)
        for i in leader:
            agent.remove(leader)
            if pull[i] in arm:
                arm.remove(pull[i])
    optimal_reward=0
    for i in range(M):
        optimal_reward=optimal_reward+value[i,match[i]]
    for j in range(dot_number):
        if reward[j]!=0:
            regret[j]=optimal_reward*j*100000-reward[j]
        else:
            earn=0
            t = np.zeros(M)
            for i in range(M):
                for k in range(K):
                    earn = t_total[i, k] * value[i, k] + earn
                    t[i] = t[i] + t_total_collision[i, k]
            regret[j] = 0 - earn
            for i in range(M):
                if T < t[i]:
                    print('fail')
                regret [j]= (t[i]) * (value[i, match[i]]) + regret[j]
    regret[0]=0
    return regret